*******************************************************************;
***table2.sas                                                   ***;
*******************************************************************;
***This program calculates shipments-weighted and               ***;
***purchase-weighted variance decompositions shown in Table 2   ***;
***of the Davis, Grim, Haltiwanger, and Streitwieser 2013 RESTAT***;
***paper.                                                       ***;
*******************************************************************;
***Output:                                                      ***;
*** csv/table2_tvs.csv                                          ***;
*** csv/table2_pe.csv                                           ***;
*******************************************************************;

options ls=200 ps=max obs=max;

****START MACROS****;

*Macro to sort dataset by year;

%macro sortit(name);

proc sort data=&name;
	by year;

%mend sortit;

*Macro to compute within and between variances by year for each location;

%macro doit1(loc, var);

%*Calculate the location specific mean;

proc sort data=gs6300;
	by year &var;

proc means data=gs6300 noprint;
	var lpe;
	by year &var;
	weight wt&var1;
	output out=mean&loc mean=mean;

data stat&loc (keep = mean_yr_&loc year &var);
	set mean&loc;
	mean_yr_&loc = mean;

%*Merge the location mean back to the plant level data in gs6300;

proc sort data=stat&loc;
	by year &var;

data gs6300;
	merge gs6300 (in=a) stat&loc;
	by year &var;
	if a;

%*Compute the squared deviation of the plant lpe from location mean lpe;

data gs6300;
	set gs6300;
	sqdev&loc = (lpe - mean_yr_&loc)*(lpe - mean_yr_&loc);

%*Sum up all the squared deviations of the plants in that location using the sample weights 
and create number of plants in each location;

proc means data=gs6300 noprint ;
	by year &var;
	output out=sum&loc
	sum(sqdev&loc) = sumsqdev&loc
	sum(num) = num&loc;
	var sqdev&loc num;
	weight wt&var1;

%*Compute a location weight which is equal to the weighted number of plants in the 
location divided by the TOTAL weighted number of plants in the entire country;

data sum&loc (keep = year &var num&loc sumsqdev&loc);
	set sum&loc;

%*Merge the num&loc and sumsqdev&loc onto stat&loc;

proc sort data=stat&loc;
	by year &var;

proc sort data=sum&loc;
	by year &var;

data stat&loc;
	merge stat&loc (in=a) sum&loc (in=b);
	by year &var;
	if a;

%*Merge numus and mean_yr_tot onto stat&loc;
%*Calculate wt&loc;

proc sort data=gs6300 nodupkey out=&loc.wt (keep = year &var numus mean_yr_tot);
	by year &var;

data stat&loc;
	merge stat&loc &loc.wt;
	by year &var;
	wt&loc = num&loc/numus;
	sumsqdev&loc = sumsqdev&loc / num&loc;

%*Compute within location variance as the weighted sum of sumsqdev&loc using wtloc;

proc means data=stat&loc noprint ;
	by year;
	output out=wtnus&loc
	sum(sumsqdev&loc) = var_wtn_&loc;
	var sumsqdev&loc;
	weight wt&loc;

%**Compute between location variance;

%*Take location specific mean and deviate from grand mean
(grand mean is the sample weighted grand mean for the year);
%*Square this deviation;

data stat&loc;
	set stat&loc;
	sqdevbtn&loc = (mean_yr_&loc - mean_yr_tot)*(mean_yr_&loc - mean_yr_tot);

%*Sum up these squared deviations using wt&loc;

proc means data=stat&loc noprint ;
	by year;
	output out=btnus&loc
	sum(sqdevbtn&loc) = var_btn_&loc;
	var sqdevbtn&loc;
	weight wt&loc;

%mend doit1;


*Macro to do variance decompositions;

%macro doitall(var1);

data gs6300;
	set elec.gs6300 (keep = year ppn lpe cyfstn bigutil 
		nerc_bu fipsst decile wt centile tvs va pe newind grid);

proc sort data=gs6300;
	by year ppn;

data gs6300 (drop = sizech bigutilch sizech2 buch2 sz3 bu3 szbu rankch
		rankch2  rk3  rkbu );
	set gs6300;
	by year ppn;
	wt100=wt*100;
	num=1;
	
	*Get rid of missing values;
	*There should be none;
	if lpe ne .;
	if nerc_bu ne .;
	if bigutil ne .;
	if cyfstn ne .;
	if fipsst ne .;
	if decile ne .;
	if centile ne .;
	if tvs ne .;
	if va ne .;
	if pe ne .;

	*Create variable that is a unique combination of big utility and decile;
	sizech = put(decile, 2.);
	bigutilch = put(bigutil, 8.);
	sizech2 = right(sizech);
	buch2 = right(bigutilch);
	sz3 = translate(sizech2, '0', ' ');
	bu3 = translate(buch2, '0', ' ');
	szbu = bu3||sz3;
	bubysz = input(szbu, 10.);

	*Create variable that is a unique combination of big utility and centile (not pooled 
	across years);
	rankch = put(centile, 3.);
	bigutilch = put(bigutil, 8.);
	rankch2 = right(rankch);
	buch2 = right(bigutilch);
	rk3 = translate(rankch2, '0', ' ');
	bu3 = translate(buch2, '0', ' ');
	rkbu = bu3||rk3;
	bubyrk = input(rkbu, 11.);

	wt&var1 = wt*&var1;

*Compute grand mean for each year;

proc means data=gs6300 noprint;
	var lpe;
	by year;
	weight wt&var1;	
	output out=meantot mean=mean;

data meantot (keep = year mean_yr_tot);
	set meantot;
	mean_yr_tot = mean;

*Compute total weighted number of plants in US in each year;
proc means data=gs6300 noprint ;
	by year;
	output out=sumus
	sum(num) = numus;
	var num;
	weight wt&var1;

*Merge grand mean and total weighted number of plants in US to gs6300;

proc sort data=sumus (keep = year numus);
	by year;

proc sort data=meantot;
	by year;

proc sort data=gs6300;
	by year;

data gs6300;
	merge gs6300 meantot sumus;
	by year;

***Compute within and between variances by year for each location;


%doit1(nr, nerc_bu)
%doit1(bu, bigutil)
%doit1(cy, cyfstn)
%doit1(st, fipsst)
%doit1(sz, decile)
%doit1(rk, centile)
%doit1(bs, bubysz)
%doit1(br, bubyrk)
%doit1(id, newind)
%doit1(gr, grid)

**Cross-check;

data gs6300;
	set gs6300;
	sqdevtot = (lpe - mean_yr_tot)*(lpe - mean_yr_tot);

proc means data=gs6300 noprint;
	by year;
	output out=totus
	sum(sqdevtot) = sum_tot;
	var sqdevtot;
	weight wt&var1;

proc sort data=totus (keep = year sum_tot);
	by year;

proc sort data=gs6300;
	by year;
	
data gs6300;
	merge gs6300 totus;
	by year;
	var_tot = sum_tot / numus;

*Create data set with var_tot and year only;

data vartot;
	set gs6300 (keep = year var_tot numus);

proc sort data = vartot nodupkey;
	by year;	

*Create data set with between, within, and total variance for all locations by year;

*Sort all data sets by year;

%sortit(wtnuscy) 
%sortit(btnuscy) 
%sortit(wtnusbu) 
%sortit(btnusbu) 
%sortit(wtnusnr)
%sortit(btnusnr) 
%sortit(wtnusst) 
%sortit(btnusst)
%sortit(wtnussz) 
%sortit(btnussz)
%sortit(wtnusrk) 
%sortit(btnusrk)
%sortit(wtnusbs)
%sortit(btnusbs)
%sortit(wtnusbr) 
%sortit(btnusbr)
%sortit(wtnusid) 
%sortit(btnusid)
%sortit(wtnusgr) 
%sortit(btnusgr)

data allloc_&var1 (drop = _TYPE_ _FREQ_);
	merge wtnuscy btnuscy wtnusbu btnusbu wtnusnr btnusnr 
	      wtnusst btnusst wtnussz btnussz wtnusrk btnusrk 
	      wtnusbs btnusbs wtnusbr btnusbr wtnusid btnusid
	      wtnusgr btnusgr vartot;
	by year;
	
	*Create quick macro to calculate var_est for each location;
	%macro doit2(loc);
		var_est_&loc = (var_btn_&loc + var_wtn_&loc);
	%mend;

	%doit2(cy)
	%doit2(bu)
	%doit2(nr)
	%doit2(st)
	%doit2(sz)
	%doit2(rk)
	%doit2(bs)
	%doit2(br)
	%doit2(id)
	%doit2(gr)

proc print data=allloc_&var1;
	var var_tot
	    var_est_cy var_est_bu var_est_nr 
	    var_est_st var_est_sz var_est_rk 
	    var_est_bs var_est_br var_est_id
	    var_est_gr;
	title1 'Compare these';

proc print data=allloc_&var1;
	var year var_wtn_cy var_btn_cy var_wtn_bu var_btn_bu var_wtn_st var_btn_st 
		var_wtn_nr var_btn_nr var_wtn_sz var_btn_sz
		var_wtn_rk var_btn_rk var_wtn_bs var_btn_bs 
		var_wtn_br var_btn_br 
		var_wtn_gr var_btn_gr var_tot var_wtn_id var_btn_id;
	title1 "allloc_&var1 data set"; 

proc export data=allloc_&var1 outfile="csv/table2_&var1..csv"
	dbms=csv replace;

proc datasets lib=work;
	delete brwt bswt buwt cywt grwt idwt nrwt rkwt stwt szwt
		btnusbr btnusbs btnusbu btnuscy btnusgr btnusid btnusnr btnusrk btnusst btnussz
		meanbr meanbs meanbu meancy meangr meanid meannr meanrk meanst meansz meantot
		statbr statbs statbu statcy statgr statid statnr statrk statst statsz
		sumbr sumbs sumbu sumcy sumgr sumid sumnr sumrk sumst sumsz sumus
		wtnusbr wtnusbs wtnusbu wtnuscy wtnusgr wtnusid wtnusnr wtnusrk wtnusst wtnussz
		totus vartot gs6300 		
		;
run;
quit;

%mend doitall;


****START PROGRAM****;


*Run macro to calculate variance decompositions -- shipments-weighted and purchase-weighted;

%doitall(pe)
%doitall(tvs)

run;

